


*********************************************************
*
*merge to main firm-level panel dataset
*
*********************************************************
use "../Raw/GCEL_Company_list/fuzzy_matching/GCEL_Parent_Id", clear
keep GCELcompanyID Parentid
rename Parentid ParentID_GCEL
tempfile GCEL_parent_company
save `GCEL_parent_company'

use "../Intermediate/analysis_panel_firmyear.dta",clear
joinby ParentID_GCEL using `GCEL_parent_company'
tempfile company_year_tomerge
save `company_year_tomerge'

use "../Raw/GCEL_Company_list/fuzzy_matching/GCPT_GCEL_merge.dta",clear  
keep TrackerID GCELcompanyID
tempfile GCEL_GCPT
save `GCEL_GCPT'


use "../Intermediate/gcpt_clean.dta", clear
merge m:1 TrackerID using `GCEL_GCPT', nogen keep(3)
duplicates tag Plant Unit, gen(tag)
drop if tag >= 1 


*EIA USA Data
merge 1:1 Plant Unit using "../Intermediate/usa_eia_generator_data.dta", keep(1 3) nogen

save "../Intermediate/analysis_plant_plans.dta", replace

*Bring in Wiki data
include "../Data_Cleaning/clean_gem_retirement_scraping.do" 

use "../Intermediate/analysis_plant_plans.dta", clear 
merge 1:1 TrackerID using "../Intermediate/gem_wiki_announcement_match", keep(1 3) nogen

**convert to 09-21 panel data format
gen start_year = 2009
gen end_year = 2021
expand end_year - start_year + 1
by TrackerID, sort: gen year = start_year + _n - 1
drop *_year
sort TrackerID year

joinby GCELcompanyID year using `company_year_tomerge'
duplicates drop
order TrackerID ParentID_GCEL GCELcompanyID year

*********************************************************
*
*CLEAN UP PANEL FOR ANALYSIS
*
*********************************************************

*Convert all first_dates to year level data
foreach var of varlist first_date_* {
    gen `var'_year = year(`var')
	drop `var'
}

egen plant_parent_id=group(TrackerID ParentID_GCEL)
egen plant_id = group(TrackerID)
egen facility_id = group(TrackerLOC)
egen parent_id = group(ParentID_GCEL)
gen plant_age = year-first_date_Operating_year


label var plant_age "Plant Age"

encode Coaltype, gen(coaltype_i)
gen coaltype_anth = regexm(Coaltype,"anth") | regexm(Coaltype,"hard")
gen coaltype_bitum = regexm(Coaltype,"bitum")
gen coaltype_lignite = regexm(Coaltype,"lignite")
gen coaltype_subbit = regexm(Coaltype,"sub-bit")
gen coaltype_broad = "Anthracite" if coaltype_anth == 1
replace coaltype_broad = "Bituminous" if coaltype_bitum == 1
replace coaltype_broad = "Sub-Bituminous" if coaltype_subbit == 1
replace coaltype_broad = "Lignite" if coaltype_lignite == 1
replace coaltype_broad = "Unknown" if missing(coaltype_broad)
encode coaltype_broad, gen(coaltypebroad_i)


*********************************************************
*
*Defining Retirement and Mothball
*
*********************************************************
/*first_date_Retired and Retired Yr only differ for plants G101416:G101421, but first_date_Retired has slightly more data*/

*Retire (This definition excludes mothballed plants)
gen retired = year >= first_date_Retired_year

*distressed includes plants that are retired or mothballed since 2014
gen distressed = year >= first_date_Retired_year | year >= first_date_Mothballed_year

*********************************************************
*
*Defining Announcement, Shelved, and Cancelation
*
*********************************************************

gen canceled = year >= first_date_Cancelled_year
gen constructed = year >= first_date_Operating_year
gen can_or_retired = max(canceled, retired)



*********************************************************
*
*Creating cross sectional treatment varaibles 
*
*********************************************************
gen assets_2015 = Totassets if year == 2015
bys borrower_id (assets_2015): replace assets_2015 = assets_2015[1]
egen hassomeasset = max(~missing(Totassets)), by(borrower_id)
egen assets_cx = median(Totassets), by(plant_parent_id)

xtile decile_assets_2015 = assets_2015, n(10)
gen large_firm = decile_assets_2015 >= 5 if ~missing(decile_assets_2015)
gen share_high = share > 0.588 if ~missing(share)
gen share_low = 1-share_high


save "../Intermediate/analysis_panel_plantyear.dta",replace


******************************************************
***Setting up Panel for Regressions
******************************************************
use "../Intermediate/analysis_panel_plantyear.dta", clear

bys plant_parent_id: gen cx = _n == 1
sum ss_pre_all_debt_ban_intnstyn1_mx if cx == 1 & ss_pre_all_debt_ban_intnstyn1_mx > 0, de
gen ban_intensity_max_sd = ss_pre_all_debt_ban_intnstyn1_mx/`r(sd)'
gen big_plant = CapacityMW >= 500
gen small_firm = median_assets_mean == 1

rename post15 post2015 
label var post2015 "$ Year \geq 2015$"
label var large_firm "Large Firm"
label var share_low "Low Coal Share"
label var ban_intensity_max_sd "$\text{Bank Exit Exposure (Max)}_{f}$"
label var small_firm "Small Firm"
label var big_plant "Large Plant"



save ../Intermediate/plant_year_panel_clean, replace
